General

Alignment summary

datadir="/Volumes/la420/home/CUTnTAG/antibodies/bowtie2_summary/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

## collect the alignment results from the bowtie2 alignment summary files
alignResult = c()
for(sample in sampleList){
  alignRes = read.table(paste0(datadir, sample, "_trimmed_bowtie2.txt"), header = FALSE, fill = TRUE)
  alignRate = substr(alignRes$V1[6], 1, nchar(as.character(alignRes$V1[6]))-1)
  alignResult = data.frame(Antibody = sample, SequencingDepth = alignRes$V1[1] %>% as.character %>% as.numeric, 
                           MappedFragNum_hg19 = alignRes$V1[4] %>% as.character %>% as.numeric + alignRes$V1[5] %>% as.character %>% as.numeric,
                           AlignmentRate_hg19 = alignRate %>% as.numeric)  %>% rbind(alignResult, .)
}
alignResult %>% mutate(AlignmentRate_hg19 = paste0(AlignmentRate_hg19, "%"))

Duplicates

## summarize the duplication information from the picard summary outputs
picarddir = "/Volumes/la420/home/CUTnTAG/antibodies/picard_summary/"

dupResult = c()
for(sample in sampleList){
  dupRes = read.table(paste0(picarddir, sample, "_picard.rmDup.txt"), header = TRUE, fill = TRUE)
  dupResult = data.frame(Antibody = sample, MappedFragNum_hg19 = dupRes$READ_PAIRS_EXAMINED[1] %>% as.character %>% as.numeric, DuplicationRate = dupRes$PERCENT_DUPLICATION[1] %>% as.character %>% as.numeric * 100, EstimatedLibrarySize = dupRes$ESTIMATED_LIBRARY_SIZE[1] %>% as.character %>% as.numeric) %>% mutate(UniqueFragNum = MappedFragNum_hg19 * (1-DuplicationRate/100))  %>% rbind(dupResult, .)
}

alignDupSummary = left_join(alignResult, dupResult, by = c("Antibody", "MappedFragNum_hg19")) %>% mutate(DuplicationRate = paste0(DuplicationRate, "%"))
alignDupSummary

Fragment length

Without duplicates

## collect the fragment size information
fragdir = "/Volumes/la420/home/CUTnTAG/antibodies/fragmentLen/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

fragLen = c()
for(sample in sampleList){
  fragLen = read.table(paste0(fragdir, sample, "_rmDup_fragmentLen.txt"), header = FALSE) %>% mutate(fragLen = V1 %>% as.numeric, fragCount = V2 %>% as.numeric, Weight = as.numeric(V2)/sum(as.numeric(V2)), Antibody = sample) %>% rbind(fragLen, .) 
}

fig5A = fragLen %>% ggplot(aes(x = Antibody, y = fragLen, weight = Weight, fill = Antibody)) +
    geom_violin(bw = 5) +
    scale_y_continuous(breaks = seq(0, 800, 50)) +
    scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "magma", alpha = 0.8) +
    scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
    theme_bw(base_size = 20) +
    ggpubr::rotate_x_text(angle = 20) +
    ylab("Fragment Length") +
    xlab("")

fig5B = fragLen %>% ggplot(aes(x = fragLen, y = fragCount, color = Antibody, group = Antibody)) +
  geom_line(size = 1) +
  scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "magma") +
  theme_bw(base_size = 20) +
  xlab("Fragment Length") +
  ylab("Count") +
  coord_cartesian(xlim = c(0, 500))

ggarrange(fig5A, fig5B, ncol = 2)

With duplicates

## collect the fragment size information
fragdir = "/Volumes/la420/home/CUTnTAG/antibodies_withDup/fragmentLen/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

fragLen = c()
for(sample in sampleList){
  fragLen = read.table(paste0(fragdir, sample, "_withDup_fragmentLen.txt"), header = FALSE) %>% mutate(fragLen = V1 %>% as.numeric, fragCount = V2 %>% as.numeric, Weight = as.numeric(V2)/sum(as.numeric(V2)), Antibody = sample) %>% rbind(fragLen, .) 
}

fig5A = fragLen %>% ggplot(aes(x = Antibody, y = fragLen, weight = Weight, fill = Antibody)) +
    geom_violin(bw = 5) +
    scale_y_continuous(breaks = seq(0, 800, 50)) +
    scale_fill_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "viridis", alpha = 0.8) +
    scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9) +
    theme_bw(base_size = 20) +
    ggpubr::rotate_x_text(angle = 20) +
    ylab("Fragment Length") +
    xlab("")

fig5B = fragLen %>% ggplot(aes(x = fragLen, y = fragCount, color = Antibody, group = Antibody)) +
  geom_line(size = 1) +
  scale_color_viridis(discrete = TRUE, begin = 0.1, end = 0.9, option = "viridis") +
  theme_bw(base_size = 20) +
  xlab("Fragment Length") +
  ylab("Count") +
  coord_cartesian(xlim = c(0, 500))

ggarrange(fig5A, fig5B, ncol = 2)

SEACR

1. Import peaks

peakdir_S = "/Volumes/la420/home/CUTnTAG/antibodies/SEACR"

Diagenode_50x_path = file.path(peakdir_S, "Diagenode_50x_seacr_top0.01.peaks.stringent.bed")
Diagenode_50x = ChIPseeker::readPeakFile(Diagenode_50x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Abcam_ab4729_path = file.path(peakdir_S, "Abcam-ab4729_seacr_top0.01.peaks.stringent.bed")
Abcam_ab4729 = ChIPseeker::readPeakFile(Abcam_ab4729_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Diagenode_100x_path = file.path(peakdir_S, "Diagenode_100x_seacr_top0.01.peaks.stringent.bed")
Diagenode_100x = ChIPseeker::readPeakFile(Diagenode_100x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

ActiveMotif_path = file.path(peakdir_S, "ActiveMotif_seacr_top0.01.peaks.stringent.bed")
ActiveMotif = ChIPseeker::readPeakFile(ActiveMotif_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Abcam_ab177178_path = file.path(peakdir_S, "Abcam-ab177178_seacr_top0.01.peaks.stringent.bed")
Abcam_ab177178 = ChIPseeker::readPeakFile(Abcam_ab177178_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

2. Peak information

Peak numbers

peakdir_S = "/Volumes/la420/home/CUTnTAG/antibodies/SEACR/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

Total_CT_peaks = c()
for(sample in sampleList){
    peakInfo_S = read.table(paste0(peakdir_S, sample, "_seacr_top0.01.peaks.stringent.bed"), header = FALSE, fill = TRUE) %>% mutate(width = abs(V3-V2))
    Total_CT_peaks = data.frame(Total_CT_peaks = nrow(peakInfo_S), Antibody = sample) %>% rbind(Total_CT_peaks, .)
 }

Total_CT_peaks %>% dplyr::select(Antibody, Total_CT_peaks)

Peak and sequencing/alignment results

PeaksAlignmentSummary = left_join(Total_CT_peaks, alignDupSummary, by = "Antibody")
PeaksAlignmentSummary = PeaksAlignmentSummary[c("Antibody", "SequencingDepth", "MappedFragNum_hg19", "UniqueFragNum", "DuplicationRate", "Total_CT_peaks")]
PeaksAlignmentSummary
ggplot(PeaksAlignmentSummary, aes(x = SequencingDepth, y = Total_CT_peaks)) + geom_point(aes(size = DuplicationRate)) + geom_text(aes(label = Antibody), nudge_y = 500, hjust = "inward")

Peak widths

samples = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)

for(sample in samples){
  print(GenomicRanges::width(sample) %>% summary())
}
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      74     703     917    1048    1188   24950 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      43    1046    1339    1532    1758   25072 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     106    1720    2256    2696    3136   65394 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     177    1106    1393    1653    1832   23074 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     195    2075    2736    3231    3740   66803

Fraction of reads in peaks

datadir="/Volumes/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

inPeakData = c()
for(sample in sampleList){
  peakRes = read.table(paste0(datadir, "/SEACR/", sample, "_seacr_top0.01.peaks.stringent.bed"), header = FALSE, fill = TRUE)
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$V1, IRanges(start = peakRes$V2, end = peakRes$V3), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  inPeakN = counts(fragment_counts)[,1] %>% sum
  inPeakData = rbind(inPeakData, data.frame(inPeakN = inPeakN, Antibody = sample))
}

frip = left_join(alignDupSummary, inPeakData, by = "Antibody") %>% mutate(frip = inPeakN/UniqueFragNum * 100)
frip %>% dplyr::select(Antibody, SequencingDepth, MappedFragNum_hg19, AlignmentRate_hg19, UniqueFragNum, FragInPeakNum = inPeakN, FRiPs = frip)

3. Coverage

Remove largest peaks (in all cases, under 30 peaks per sample):

ActiveMotif_adj = data.frame(ActiveMotif)
ActiveMotif_adj = ActiveMotif_adj[ActiveMotif_adj$V4 < 50000, ] %>% GenomicRanges::GRanges()

Diagenode_100x_adj = data.frame(Diagenode_100x)
Diagenode_100x_adj = Diagenode_100x_adj[Diagenode_100x_adj$V4 < 100000, ] %>% GenomicRanges::GRanges()

Diagenode_50x_adj = data.frame(Diagenode_50x)
Diagenode_50x_adj = Diagenode_50x_adj[Diagenode_50x_adj$V4 < 400000, ] %>% GenomicRanges::GRanges()

Abcam_ab4729_adj = data.frame(Abcam_ab4729)
Abcam_ab4729_adj = Abcam_ab4729_adj[Abcam_ab4729_adj$V4 < 400000, ] %>% GenomicRanges::GRanges()

Abcam_ab177178_adj = data.frame(Abcam_ab177178)
Abcam_ab177178_adj = Abcam_ab177178_adj[Abcam_ab177178_adj$V4 < 100000, ] %>% GenomicRanges::GRanges()

Covplots

Active Motif

ChIPseeker::covplot(peak = ActiveMotif_adj,
                    weightCol = "V4",
                    title = "CUT&TAG ActiveMotif (1:100) H3K27ac peaks (SEACR)")

Diagenode (1:100)

ChIPseeker::covplot(peak = Diagenode_100x_adj,
                    weightCol = "V4",
                    title = "CUT&TAG Diagenode (1:100) H3K27ac peaks (SEACR)")

Diagenode (1:50)

ChIPseeker::covplot(peak = Diagenode_50x_adj,
                    weightCol = "V4",
                    title = "CUT&TAG Diagenode (1:50) H3K27ac peaks (SEACR)")

Abcam-ab4729

ChIPseeker::covplot(peak = Abcam_ab4729_adj,
                    weightCol = "V4",
                    title = "CUT&TAG Abcam-ab4729 (1:100) H3K27ac peaks (SEACR)")

Abcam-ab177178

ChIPseeker::covplot(peak = Abcam_ab177178_adj,
                    weightCol = "V4",
                    title = "CUT&TAG Abcam-ab177178 (1:100) H3K27ac peaks (SEACR)")

4. ENCODE peak overlap

Using ENCODE narrowPeak file with replicated H3K27ac peaks in K562 (ENCFF044JNJ)

extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")

ENCODE_narrowPeaks = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)

Count total C&T peaks falling into ENCODE

Discard values of 0 (corresponding to peak ranges with 0 ENCODE overlaps) and count how many ranges (peaks) remain:

peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
total_overlapping_peaks_list = c()

for(i in 1:5){
  overlaps = GenomicRanges::countOverlaps(query = peak_list[[i]], subject = ENCODE_narrowPeaks)
  CT_peaks_in_ENCODE = overlaps[overlaps != 0] %>% length()
  total_overlapping_peaks_list = c(total_overlapping_peaks_list, CT_peaks_in_ENCODE)
}

Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, Total_CT_peaks_falling_into_ENCODE = total_overlapping_peaks_list)
Total_CT_peaks_falling_into_ENCODE

Count total ENCODE peaks falling into C&T

Discard values of 0 (corresponding to peak ranges with 0 C&T overlaps) and count how many ranges (peaks) remain:

peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
captured_ENCODE_list = c()

for(i in 1:5){
  overlaps = GenomicRanges::countOverlaps(query = ENCODE_narrowPeaks, subject = peak_list[[i]])
  ENCODE_captured_peaks = overlaps[overlaps != 0] %>% length()
  captured_ENCODE_list = c(captured_ENCODE_list, ENCODE_captured_peaks)
}

Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, Total_captured_ENCODE_peaks = captured_ENCODE_list)
Total_captured_ENCODE_peaks

Summarize

ENCODE_overlaps = Total_captured_ENCODE_peaks %>% left_join(Total_CT_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")

ENCODE_overlaps$Percentage_of_CT_peaks_falling_into_ENCODE = ENCODE_overlaps$Total_CT_peaks_falling_into_ENCODE / ENCODE_overlaps$Total_CT_peaks * 100
ENCODE_overlaps$Percentage_of_total_ENCODE = ENCODE_overlaps$Total_captured_ENCODE_peaks / length(ENCODE_narrowPeaks) * 100

ENCODE_overlaps = ENCODE_overlaps[c("Antibody", "Total_CT_peaks", "Total_CT_peaks_falling_into_ENCODE", "Percentage_of_CT_peaks_falling_into_ENCODE", "Total_captured_ENCODE_peaks", "Percentage_of_total_ENCODE")]
ENCODE_overlaps

ENCODE peaks captured by C&T (chart)

ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])

#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")

ENCODE_overlap_plot = ggplot(ENCODE_overlaps) +
  geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
  geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_total_ENCODE, fill = Antibody), show.legend = FALSE) +
  ylim(0, 100) +
  ylab("% of ENCODE peaks captured") +
  xlab("Antibody") +
  #scale_x_discrete(labels = x_labels) +
  ggtitle("ENCODE peaks captured by C&T") +
  theme_bw()
  
ENCODE_overlap_plot

Visualize overlaps

Abcam-ab4729
Abcam_ab4729_df = data.frame(Abcam_ab4729)
Abcam_ab4729_IRanges = IRanges(start = Abcam_ab4729_df$start, Abcam_ab4729_df$end)

ENCODE_narrowPeaks_df = data.frame(ENCODE_narrowPeaks)
ENCODE_narrowPeaks_IRanges = IRanges(start = ENCODE_narrowPeaks_df$start, ENCODE_narrowPeaks_df$end)

GenometriCorr::VisualiseTwoIRanges(Abcam_ab4729_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Abcam_ab4729", nameB = "ENCODE_narrowPeaks")

Diagenode_50x
Diagenode_50x_df = data.frame(Diagenode_50x)
Diagenode_50x_IRanges = IRanges(start = Diagenode_50x_df$start, Diagenode_50x_df$end)

GenometriCorr::VisualiseTwoIRanges(Diagenode_50x_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Diagenode_50x", nameB = "ENCODE_narrowPeaks")

ActiveMotif
ActiveMotif_df = data.frame(ActiveMotif)
ActiveMotif_IRanges = IRanges(start = ActiveMotif_df$start, ActiveMotif_df$end)

GenometriCorr::VisualiseTwoIRanges(ActiveMotif_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Diagenode_50x", nameB = "ENCODE_narrowPeaks")

Proportion of C&T peaks falling into ENCODE (chart)

ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])

#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")

Peaks_in_ENCODE_plot = ggplot(ENCODE_overlaps) +
  geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
  geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_CT_peaks_falling_into_ENCODE, fill = Antibody), show.legend = FALSE) +
  ylim(0, 100) +
  ylab("% of sample peaks falling into ENCODE") +
  xlab("Antibody") +
  #scale_x_discrete(labels = x_labels) +
  ggtitle("Proportion of C&T peaks falling into ENCODE") +
  theme_bw()
  
Peaks_in_ENCODE_plot

5. Average TSS peak profiles

promoters = ChIPseeker::getPromoters(TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, upstream = 3000, downstream = 3000)
peak_files = list("Active Motif (1:100)" = ActiveMotif_path,
                  "Diagenode (1:100)" = Diagenode_100x_path,
                  "Diagenode (1:50)" = Diagenode_50x_path,
                  "Abcam 177178 (1:100)" = Abcam_ab177178_path,
                  "Abcam 4729 (1:100)" = Abcam_ab4729_path,
                  "ENCODE narrow peaks" = ENCODE_narrowPeaks)

tagMatrixList = lapply(peak_files, getTagMatrix, windows = promoters)

ChIPseeker::plotAvgProf(tagMatrixList, xlim = c(-3000, 3000), conf = 0.95, resample = 500, facet = "row")
## >> Running bootstrapping for tag matrix...        2021-04-25 19:23:33 
## >> Running bootstrapping for tag matrix...        2021-04-25 19:24:23 
## >> Running bootstrapping for tag matrix...        2021-04-25 19:25:37 
## >> Running bootstrapping for tag matrix...        2021-04-25 19:26:30 
## >> Running bootstrapping for tag matrix...        2021-04-25 19:28:04 
## >> Running bootstrapping for tag matrix...        2021-04-25 19:30:10

6. Peak heatmaps

tagHeatmap(tagMatrixList, xlim = c(-3000, 3000), color = NULL)

7. Annotation

peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)

ChIPseeker::plotAnnoBar(peakAnnoList)

8. Distance to TSS

ChIPseeker::plotDistToTSS(peakAnnoList)

9. Enrichment analysis

genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
names(genes) = sub("_", "\n", names(genes))
compKEGG = clusterProfiler::compareCluster(geneCluster = genes,
                                           fun = "enrichKEGG",
                                           pvalueCutoff = 0.05,
                                           pAdjustMethod = "BH")
clusterProfiler::dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")

10. Gene overlaps

Without Active Motif

peaks = list("Diagenode (1:100)" = Diagenode_100x,
             "Diagenode (1:50)" = Diagenode_50x,
             "Abcam 177178 (1:100)" = Abcam_ab177178,
             "Abcam 4729 (1:100)" = Abcam_ab4729)

peakAnnoList = lapply(peaks, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)

genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
vennplot(genes)

11. Peak overlaps

Without Active Motif

peaks = list("Diagenode (1:100)" = GenomicRanges::GRanges(Diagenode_100x),
             "Diagenode (1:50)" = GenomicRanges::GRanges(Diagenode_50x),
             "Abcam 177178 (1:100)" = GenomicRanges::GRanges(Abcam_ab177178),
             "Abcam 4729 (1:100)" = GenomicRanges::GRanges(Abcam_ab4729))

vennplot(peaks)

12. Motif enrichment

HOMER

##                                                       Motif.Name
## 1           Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer
## 2                 NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer
## 3              CEBP:CEBP(bZIP)/MEF-Chop-ChIP-Seq(GSE35681)/Homer
## 4                Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer
## 5          GATA:SCL(Zf,bHLH)/Ter119-SCL-ChIP-Seq(GSE18720)/Homer
## 6 Oct4:Sox17(POU,Homeobox,HMG)/F9-Sox17-ChIP-Seq(GSE44553)/Homer
##              Consensus P.value Log.P.value q.value..Benjamini.
## 1         HTGCTGAGTCAT   1e-18      -43.11                   0
## 2         GATGACTCAGCA   1e-16      -37.77                   0
## 3 NTNATGCAAYMNNHTGMAAY   1e-16      -37.66                   0
## 4      AWWNTGCTGAGTCAT   1e-12      -28.15                   0
## 5 CRGCTGBNGNSNNSAGATAA   1e-12      -28.02                   0
## 6      CCATTGTATGCAAAT   1e-10      -24.87                   0
##   X..of.Target.Sequences.with.Motif.of.3211. X..of.Target.Sequences.with.Motif
## 1                                        114                             3.55%
## 2                                        121                             3.77%
## 3                                        272                             8.47%
## 4                                        102                             3.18%
## 5                                        208                             6.48%
## 6                                        193                             6.01%
##   X..of.Background.Sequences.with.Motif.of.45156.
## 1                                           609.6
## 2                                           719.1
## 3                                          2238.8
## 4                                           648.9
## 5                                          1729.6
## 6                                          1628.2
##   X..of.Background.Sequences.with.Motif
## 1                                 1.35%
## 2                                 1.60%
## 3                                 4.97%
## 4                                 1.44%
## 5                                 3.84%
## 6                                 3.61%
##                                        Motif.Name            Consensus P.value
## 1    Elk4(ETS)/Hela-Elk4-ChIP-Seq(GSE31477)/Homer           NRYTTCCGGY   1e-74
## 2    Elk1(ETS)/Hela-Elk1-ChIP-Seq(GSE31477)/Homer           HACTTCCGGY   1e-72
## 3                          YY1(Zf)/Promoter/Homer         CAAGATGGCGGC   1e-54
## 4                         ETS(ETS)/Promoter/Homer           AACCGGAAGT   1e-52
## 5 ELF1(ETS)/Jurkat-ELF1-ChIP-Seq(SRA014231)/Homer           AVCCGGAAGT   1e-52
## 6                   GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC   1e-51
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.8040.
## 1      -170.8                   0                                       3970
## 2      -166.2                   0                                       3967
## 3      -125.2                   0                                        851
## 4      -121.9                   0                                       2446
## 5      -120.7                   0                                       3625
## 6      -119.7                   0                                        618
##   X..of.Target.Sequences.with.Motif
## 1                            49.38%
## 2                            49.34%
## 3                            10.58%
## 4                            30.42%
## 5                            45.09%
## 6                             7.69%
##   X..of.Background.Sequences.with.Motif.of.39503.
## 1                                         15528.8
## 2                                         15568.3
## 3                                          2382.5
## 4                                          9071.4
## 5                                         14517.7
## 6                                          1564.5
##   X..of.Background.Sequences.with.Motif
## 1                                39.31%
## 2                                39.41%
## 3                                 6.03%
## 4                                22.96%
## 5                                36.75%
## 6                                 3.96%
##                                        Motif.Name            Consensus P.value
## 1 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer      AWWNTGCTGAGTCAT   1e-88
## 2                         ETS(ETS)/Promoter/Homer           AACCGGAAGT   1e-86
## 3                   GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC   1e-83
## 4                          YY1(Zf)/Promoter/Homer         CAAGATGGCGGC   1e-83
## 5    Elk1(ETS)/Hela-Elk1-ChIP-Seq(GSE31477)/Homer           HACTTCCGGY   1e-77
## 6  NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer         GATGACTCAGCA   1e-74
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.11995.
## 1      -202.9                   0                                         890
## 2      -198.3                   0                                        4787
## 3      -193.2                   0                                        1105
## 4      -191.9                   0                                        1537
## 5      -178.7                   0                                        7099
## 6      -170.9                   0                                         936
##   X..of.Target.Sequences.with.Motif
## 1                             7.42%
## 2                            39.91%
## 3                             9.21%
## 4                            12.82%
## 5                            59.19%
## 6                             7.80%
##   X..of.Background.Sequences.with.Motif.of.35235.
## 1                                          1257.9
## 2                                         11059.8
## 3                                          1736.7
## 4                                          2708.3
## 5                                         17861.8
## 6                                          1444.0
##   X..of.Background.Sequences.with.Motif
## 1                                 3.57%
## 2                                31.38%
## 3                                 4.93%
## 4                                 7.68%
## 5                                50.68%
## 6                                 4.10%
##                                             Motif.Name       Consensus P.value
## 1       NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer    GATGACTCAGCA  1e-111
## 2 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer    HTGCTGAGTCAT   1e-96
## 3      Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT   1e-94
## 4     Jun-AP1(bZIP)/K562-cJun-ChIP-Seq(GSE31477)/Homer    GATGASTCATCN   1e-91
## 5        Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer      SAGATAAGRV   1e-91
## 6    Bach2(bZIP)/OCILy7-Bach2-ChIP-Seq(GSE44420)/Homer      TGCTGAGTCA   1e-88
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.16900.
## 1      -256.0                   0                                        1085
## 2      -222.8                   0                                         912
## 3      -218.4                   0                                         889
## 4      -210.9                   0                                        3258
## 5      -210.5                   0                                        5697
## 6      -203.5                   0                                        2837
##   X..of.Target.Sequences.with.Motif
## 1                             6.42%
## 2                             5.39%
## 3                             5.26%
## 4                            19.27%
## 5                            33.69%
## 6                            16.78%
##   X..of.Background.Sequences.with.Motif.of.33092.
## 1                                          1002.7
## 2                                           828.0
## 3                                           805.7
## 4                                          4509.5
## 5                                          8809.8
## 6                                          3832.2
##   X..of.Background.Sequences.with.Motif
## 1                                 3.03%
## 2                                 2.50%
## 3                                 2.43%
## 4                                13.62%
## 5                                26.61%
## 6                                11.58%
##                                        Motif.Name            Consensus P.value
## 1                   GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC  1e-157
## 2                          YY1(Zf)/Promoter/Homer         CAAGATGGCGGC   1e-99
## 3                         ETS(ETS)/Promoter/Homer           AACCGGAAGT   1e-80
## 4 Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer      AWWNTGCTGAGTCAT   1e-79
## 5  Ronin(THAP)/ES-Thap11-ChIP-Seq(GSE51522)/Homer RACTACAACTCCCAGVAKGC   1e-72
## 6  NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer         GATGACTCAGCA   1e-68
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.13519.
## 1      -362.3                   0                                        1556
## 2      -228.0                   0                                        1959
## 3      -185.8                   0                                        5838
## 4      -182.5                   0                                        1097
## 5      -165.9                   0                                         848
## 6      -158.2                   0                                        1193
##   X..of.Target.Sequences.with.Motif
## 1                            11.51%
## 2                            14.49%
## 3                            43.17%
## 4                             8.11%
## 5                             6.27%
## 6                             8.82%
##   X..of.Background.Sequences.with.Motif.of.33620.
## 1                                          1859.2
## 2                                          2987.6
## 3                                         11843.8
## 4                                          1480.4
## 5                                          1075.9
## 6                                          1734.5
##   X..of.Background.Sequences.with.Motif
## 1                                 5.53%
## 2                                 8.88%
## 3                                35.22%
## 4                                 4.40%
## 5                                 3.20%
## 6                                 5.16%

MACS2

1. Import peaks

peakdir_M = "/Volumes/la420/home/CUTnTAG/antibodies/MACS2"

Diagenode_50x_path = file.path(peakdir_M, "Diagenode_50x_q0.05_peaks.narrowPeak")
Diagenode_50x = ChIPseeker::readPeakFile(Diagenode_50x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Abcam_ab4729_path = file.path(peakdir_M, "Abcam-ab4729_q0.05_peaks.narrowPeak")
Abcam_ab4729 = ChIPseeker::readPeakFile(Abcam_ab4729_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Diagenode_100x_path = file.path(peakdir_M, "Diagenode_100x_q0.05_peaks.narrowPeak")
Diagenode_100x = ChIPseeker::readPeakFile(Diagenode_100x_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

ActiveMotif_path = file.path(peakdir_M, "ActiveMotif_q0.05_peaks.narrowPeak")
ActiveMotif = ChIPseeker::readPeakFile(ActiveMotif_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

Abcam_ab177178_path = file.path(peakdir_M, "Abcam-ab177178_q0.05_peaks.narrowPeak")
Abcam_ab177178 = ChIPseeker::readPeakFile(Abcam_ab177178_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

2. Peak information

Peak numbers

peakdir_M = "/Volumes/la420/home/CUTnTAG/antibodies/MACS2/"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

Total_CT_peaks = c()
for(sample in sampleList){
    peakInfo_M = read.table(paste0(peakdir_M, sample, "_q0.05_peaks.narrowPeak"), header = FALSE, fill = TRUE) %>% mutate(width = abs(V3-V2))
    Total_CT_peaks = data.frame(Total_CT_peaks = nrow(peakInfo_M), Antibody = sample) %>% rbind(Total_CT_peaks, .)
 }

Total_CT_peaks %>% dplyr::select(Antibody, Total_CT_peaks)

Peak and sequencing/alignment results

PeaksAlignmentSummary = left_join(Total_CT_peaks, alignDupSummary, by = "Antibody")
PeaksAlignmentSummary = PeaksAlignmentSummary[c("Antibody", "SequencingDepth", "MappedFragNum_hg19", "UniqueFragNum", "DuplicationRate", "Total_CT_peaks")]
PeaksAlignmentSummary
ggplot(PeaksAlignmentSummary, aes(x = SequencingDepth, y = Total_CT_peaks)) + geom_point(aes(size = DuplicationRate)) + geom_text(aes(label = Antibody), nudge_y = 500, hjust = "inward")

Peak widths

samples = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)

for(sample in samples){
  print(GenomicRanges::width(sample) %>% summary())
}
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   178.0   207.0   257.0   321.1   360.0  3558.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   183.0   233.0   315.0   368.4   442.0  2198.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   206.0   279.0   402.0   503.2   617.0  4230.0 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     193     224     277     328     380    1770 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   223.0   333.0   491.0   610.4   773.0  3967.0

Fraction of reads in peaks

datadir="/Volumes/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729")

inPeakData = c()
for(sample in sampleList){
  peakRes = read.table(paste0(datadir, "/MACS2/", sample, "_q0.05_peaks.narrowPeak"), header = FALSE, fill = TRUE)
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$V1, IRanges(start = peakRes$V2, end = peakRes$V3), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  inPeakN = counts(fragment_counts)[,1] %>% sum
  inPeakData = rbind(inPeakData, data.frame(inPeakN = inPeakN, Antibody = sample))
}

frip = left_join(alignDupSummary, inPeakData, by = "Antibody") %>% mutate(frip = inPeakN/UniqueFragNum * 100)
frip %>% dplyr::select(Antibody, SequencingDepth, MappedFragNum_hg19, AlignmentRate_hg19, UniqueFragNum, FragInPeakNum = inPeakN, FRiPs = frip)

3. Coverage

Remove largest peaks (in all cases, under 50 peaks per sample):

ActiveMotif_adj = data.frame(ActiveMotif)
ActiveMotif_adj = ActiveMotif_adj[ActiveMotif_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()

Diagenode_100x_adj = data.frame(Diagenode_100x)
Diagenode_100x_adj = Diagenode_100x_adj[Diagenode_100x_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()

Diagenode_50x_adj = data.frame(Diagenode_50x)
Diagenode_50x_adj = Diagenode_50x_adj[Diagenode_50x_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()

Abcam_ab4729_adj = data.frame(Abcam_ab4729)
Abcam_ab4729_adj = Abcam_ab4729_adj[Abcam_ab4729_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()

Abcam_ab177178_adj = data.frame(Abcam_ab177178)
Abcam_ab177178_adj = Abcam_ab177178_adj[Abcam_ab177178_adj$V5 < 1500, ] %>% GenomicRanges::GRanges()

Covplots

Active Motif

ChIPseeker::covplot(peak = ActiveMotif_adj,
                    weightCol = "V5",
                    title = "CUT&TAG ActiveMotif (1:100) H3K27ac peaks (MACS2)")

Diagenode (1:100)

ChIPseeker::covplot(peak = Diagenode_100x_adj,
                    weightCol = "V5",
                    title = "CUT&TAG Diagenode (1:100) H3K27ac peaks (MACS2)")

Diagenode (1:50)

ChIPseeker::covplot(peak = Diagenode_50x_adj,
                    weightCol = "V5",
                    title = "CUT&TAG Diagenode (1:50) H3K27ac peaks (MACS2)")

Abcam-ab4729

ChIPseeker::covplot(peak = Abcam_ab4729_adj,
                    weightCol = "V5",
                    title = "CUT&TAG Abcam-ab4729 (1:100) H3K27ac peaks (MACS2)")

Abcam-ab177178

ChIPseeker::covplot(peak = Abcam_ab177178_adj,
                    weightCol = "V5",
                    title = "CUT&TAG Abcam-ab177178 (1:100) H3K27ac peaks (MACS2)")

4. ENCODE peak overlap

Count total C&T peaks falling into ENCODE

Discard values of 0 (corresponding to peak ranges with 0 ENCODE overlaps) and count how many ranges (peaks) remain:

peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
total_overlapping_peaks_list = c()

for(i in 1:5){
  overlaps = GenomicRanges::countOverlaps(query = peak_list[[i]], subject = ENCODE_narrowPeaks)
  CT_peaks_in_ENCODE = overlaps[overlaps != 0] %>% length()
  total_overlapping_peaks_list = c(total_overlapping_peaks_list, CT_peaks_in_ENCODE)
}

Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, Total_CT_peaks_falling_into_ENCODE = total_overlapping_peaks_list)
Total_CT_peaks_falling_into_ENCODE

Count total ENCODE peaks falling into C&T

Discard values of 0 (corresponding to peak ranges with 0 C&T overlaps) and count how many ranges (peaks) remain:

peak_list = list(ActiveMotif, Diagenode_100x, Diagenode_50x, Abcam_ab177178, Abcam_ab4729)
captured_ENCODE_list = c()

for(i in 1:5){
  overlaps = GenomicRanges::countOverlaps(query = ENCODE_narrowPeaks, subject = peak_list[[i]])
  ENCODE_captured_peaks = overlaps[overlaps != 0] %>% length()
  captured_ENCODE_list = c(captured_ENCODE_list, ENCODE_captured_peaks)
}

Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, Total_captured_ENCODE_peaks = captured_ENCODE_list)
Total_captured_ENCODE_peaks

Summarize

ENCODE_overlaps = Total_captured_ENCODE_peaks %>% left_join(Total_CT_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")

ENCODE_overlaps$Percentage_of_CT_peaks_falling_into_ENCODE = ENCODE_overlaps$Total_CT_peaks_falling_into_ENCODE / ENCODE_overlaps$Total_CT_peaks * 100
ENCODE_overlaps$Percentage_of_total_ENCODE = ENCODE_overlaps$Total_captured_ENCODE_peaks / length(ENCODE_narrowPeaks) * 100

ENCODE_overlaps = ENCODE_overlaps[c("Antibody", "Total_CT_peaks", "Total_CT_peaks_falling_into_ENCODE", "Percentage_of_CT_peaks_falling_into_ENCODE", "Total_captured_ENCODE_peaks", "Percentage_of_total_ENCODE")]
ENCODE_overlaps

ENCODE peaks captured by C&T (chart)

ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])

#x_labels = c("Active Motif (1:100)", "Abcam 177178 (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 4729 (1:100)")

ENCODE_overlap_plot = ggplot(ENCODE_overlaps) +
  geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
  geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_total_ENCODE, fill = Antibody), show.legend = FALSE) +
  ylim(0, 100) +
  ylab("% of ENCODE peaks captured") +
  xlab("Antibody") +
  #scale_x_discrete(labels = x_labels) +
  ggtitle("ENCODE peaks captured by C&T") +
  theme_bw()
  
ENCODE_overlap_plot

Visualize overlaps

Abcam-ab4729
Abcam_ab4729_df = data.frame(Abcam_ab4729)
Abcam_ab4729_IRanges = IRanges(start = Abcam_ab4729_df$start, Abcam_ab4729_df$end)

ENCODE_narrowPeaks_IRanges = IRanges(start = ENCODE_narrowPeaks_df$start, ENCODE_narrowPeaks_df$end)

GenometriCorr::VisualiseTwoIRanges(Abcam_ab4729_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Abcam_ab4729", nameB = "ENCODE_narrowPeaks")

Diagenode_50x
Diagenode_50x_df = data.frame(Diagenode_50x)
Diagenode_50x_IRanges = IRanges(start = Diagenode_50x_df$start, Diagenode_50x_df$end)

GenometriCorr::VisualiseTwoIRanges(Diagenode_50x_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Diagenode_50x", nameB = "ENCODE_narrowPeaks")

ActiveMotif
ActiveMotif_df = data.frame(ActiveMotif)
ActiveMotif_IRanges = IRanges(start = ActiveMotif_df$start, ActiveMotif_df$end)

GenometriCorr::VisualiseTwoIRanges(ActiveMotif_IRanges, ENCODE_narrowPeaks_IRanges, nameA = "Diagenode_50x", nameB = "ENCODE_narrowPeaks")

Proportion of C&T peaks falling into ENCODE (chart)

ENCODE_overlaps$Antibody = factor(ENCODE_overlaps$Antibody, levels = ENCODE_overlaps$Antibody[order(ENCODE_overlaps$Percentage_of_total_ENCODE)])

#x_labels = c("Active Motif (1:100)", "Diagenode (1:100)", "Diagenode (1:50)", "Abcam 177178 (1:100)", "Abcam 4729 (1:100)")

Peaks_in_ENCODE_plot = ggplot(ENCODE_overlaps) +
  geom_col(aes(x = Antibody, y = 100), fill = "grey90") +
  geom_col(aes(x = reorder(Antibody, Percentage_of_total_ENCODE), y = Percentage_of_CT_peaks_falling_into_ENCODE, fill = Antibody), show.legend = FALSE) +
  ylim(0, 100) +
  ylab("% of sample peaks falling into ENCODE") +
  xlab("Antibody") +
  #scale_x_discrete(labels = x_labels) +
  ggtitle("Proportion of C&T peaks falling into ENCODE") +
  theme_bw()
  
Peaks_in_ENCODE_plot

5. Average TSS peak profiles

promoters = ChIPseeker::getPromoters(TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, upstream = 3000, downstream = 3000)
peak_files = list("Active Motif (1:100)" = ActiveMotif_path,
                  "Diagenode (1:100)" = Diagenode_100x_path,
                  "Diagenode (1:50)" = Diagenode_50x_path,
                  "Abcam 177178 (1:100)" = Abcam_ab177178_path,
                  "Abcam 4729 (1:100)" = Abcam_ab4729_path,
                  "ENCODE narrow peaks" = ENCODE_narrowPeaks)

tagMatrixList = lapply(peak_files, getTagMatrix, windows = promoters)

ChIPseeker::plotAvgProf(tagMatrixList, xlim = c(-3000, 3000), conf = 0.95, resample = 500, facet = "row")
## >> Running bootstrapping for tag matrix...        2021-04-25 20:10:56 
## >> Running bootstrapping for tag matrix...        2021-04-25 20:11:37 
## >> Running bootstrapping for tag matrix...        2021-04-25 20:13:18 
## >> Running bootstrapping for tag matrix...        2021-04-25 20:13:43 
## >> Running bootstrapping for tag matrix...        2021-04-25 20:16:17 
## >> Running bootstrapping for tag matrix...        2021-04-25 20:19:45

6. Peak heatmaps

tagHeatmap(tagMatrixList, xlim = c(-3000, 3000), color = NULL)

7. Annotation

peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)

ChIPseeker::plotAnnoBar(peakAnnoList)

8. Distance to TSS

ChIPseeker::plotDistToTSS(peakAnnoList)

9. Enrichment analysis

genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
names(genes) = sub("_", "\n", names(genes))
compKEGG = clusterProfiler::compareCluster(geneCluster = genes,
                                           fun = "enrichKEGG",
                                           pvalueCutoff = 0.05,
                                           pAdjustMethod = "BH")
clusterProfiler::dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")

10. Gene overlaps

Without Active Motif

peak_files = list("Diagenode (1:100)" = Diagenode_100x,
                  "Diagenode (1:50)" = Diagenode_50x,
                  "Abcam 177178 (1:100)" = Abcam_ab177178,
                  "Abcam 4729 (1:100)" = Abcam_ab4729)

peakAnnoList = lapply(peak_files, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)

genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
ChIPseeker::vennplot(genes)

11. Peak overlaps

Without Active Motif

peaks = list("Diagenode (1:100)" = GenomicRanges::GRanges(Diagenode_100x),
             "Diagenode (1:50)" = GenomicRanges::GRanges(Diagenode_50x),
             "Abcam 177178 (1:100)" = GenomicRanges::GRanges(Abcam_ab177178),
             "Abcam 4729 (1:100)" = GenomicRanges::GRanges(Abcam_ab4729))

vennplot(peaks)

12. Motif enrichment

HOMER

##                                               Motif.Name            Consensus
## 1        ZNF652/HepG2-ZNF652.Flag-ChIP-Seq(Encode)/Homer      TTAACCCTTTVNKKN
## 2 Phox2b(Homeobox)/CLBGA-PHOX2B-ChIP-Seq(GSE90683)/Homer         TTAATTNAATTA
## 3         Ronin(THAP)/ES-Thap11-ChIP-Seq(GSE51522)/Homer RACTACAACTCCCAGVAKGC
## 4                                 Sp1(Zf)/Promoter/Homer         GGCCCCGCCCCC
## 5                          GFY-Staf(?,Zf)/Promoter/Homer RACTACAATTCCCAGAAKGC
## 6   Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer         HTGCTGAGTCAT
##   P.value Log.P.value q.value..Benjamini.
## 1   1e-10      -25.29              0.0000
## 2   1e-09      -20.91              0.0000
## 3   1e-09      -20.87              0.0000
## 4   1e-07      -17.17              0.0000
## 5   1e-06      -15.74              0.0000
## 6   1e-04      -11.00              0.0012
##   X..of.Target.Sequences.with.Motif.of.2618. X..of.Target.Sequences.with.Motif
## 1                                        133                             5.08%
## 2                                        110                             4.20%
## 3                                         42                             1.60%
## 4                                        315                            12.02%
## 5                                         50                             1.91%
## 6                                         30                             1.15%
##   X..of.Background.Sequences.with.Motif.of.43372.
## 1                                          1166.7
## 2                                           969.0
## 3                                           230.4
## 4                                          3841.9
## 5                                           361.7
## 6                                           206.3
##   X..of.Background.Sequences.with.Motif
## 1                                 2.69%
## 2                                 2.24%
## 3                                 0.53%
## 4                                 8.87%
## 5                                 0.84%
## 6                                 0.48%
##                                             Motif.Name       Consensus P.value
## 1        Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer      SAGATAAGRV   1e-57
## 2        Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer      BBCTTATCTS   1e-55
## 3       Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer      YCTTATCTBN   1e-55
## 4 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer    HTGCTGAGTCAT   1e-54
## 5       NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer    GATGACTCAGCA   1e-54
## 6      Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT   1e-50
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.7953.
## 1      -131.6                   0                                        804
## 2      -128.1                   0                                        864
## 3      -127.8                   0                                       1087
## 4      -125.9                   0                                        153
## 5      -125.3                   0                                        173
## 6      -116.9                   0                                        151
##   X..of.Target.Sequences.with.Motif
## 1                            10.11%
## 2                            10.86%
## 3                            13.67%
## 4                             1.92%
## 5                             2.18%
## 6                             1.90%
##   X..of.Background.Sequences.with.Motif.of.41178.
## 1                                          2285.4
## 2                                          2541.7
## 3                                          3442.5
## 4                                           161.9
## 5                                           206.5
## 6                                           169.2
##   X..of.Background.Sequences.with.Motif
## 1                                 5.55%
## 2                                 6.18%
## 3                                 8.37%
## 4                                 0.39%
## 5                                 0.50%
## 6                                 0.41%
##                                             Motif.Name       Consensus P.value
## 1           p53(p53)/mES-cMyc-ChIP-Seq(GSE11431)/Homer  ACATGCCCGGGCAT  1e-192
## 2       NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer    GATGACTCAGCA  1e-169
## 3      Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT  1e-163
## 4 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer    HTGCTGAGTCAT  1e-153
## 5     NFE2L2(bZIP)/HepG2-NFE2L2-ChIP-Seq(Encode)/Homer AWWWTGCTGAGTCAT  1e-151
## 6    Bach2(bZIP)/OCILy7-Bach2-ChIP-Seq(GSE44420)/Homer      TGCTGAGTCA  1e-147
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.24167.
## 1      -442.2                   0                                         580
## 2      -391.3                   0                                         680
## 3      -376.2                   0                                         628
## 4      -354.3                   0                                         594
## 5      -348.6                   0                                         538
## 6      -340.2                   0                                        1545
##   X..of.Target.Sequences.with.Motif
## 1                             2.40%
## 2                             2.81%
## 3                             2.60%
## 4                             2.46%
## 5                             2.23%
## 6                             6.39%
##   X..of.Background.Sequences.with.Motif.of.25423.
## 1                                           132.2
## 2                                           198.9
## 3                                           177.7
## 4                                           168.2
## 5                                           142.6
## 6                                           789.9
##   X..of.Background.Sequences.with.Motif
## 1                                 0.52%
## 2                                 0.78%
## 3                                 0.70%
## 4                                 0.66%
## 5                                 0.56%
## 6                                 3.10%
##                                             Motif.Name       Consensus P.value
## 1      Bach1(bZIP)/K562-Bach1-ChIP-Seq(GSE31477)/Homer AWWNTGCTGAGTCAT   1e-79
## 2       NF-E2(bZIP)/K562-NFE2-ChIP-Seq(GSE31477)/Homer    GATGACTCAGCA   1e-77
## 3 Nrf2(bZIP)/Lymphoblast-Nrf2-ChIP-Seq(GSE37589)/Homer    HTGCTGAGTCAT   1e-73
## 4       Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer      YCTTATCTBN   1e-66
## 5        Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer      SAGATAAGRV   1e-65
## 6        Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer      BBCTTATCTS   1e-63
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.7348.
## 1      -183.3                   0                                        194
## 2      -177.8                   0                                        231
## 3      -169.7                   0                                        196
## 4      -153.8                   0                                       1014
## 5      -152.0                   0                                        779
## 6      -145.9                   0                                        826
##   X..of.Target.Sequences.with.Motif
## 1                             2.64%
## 2                             3.15%
## 3                             2.67%
## 4                            13.81%
## 5                            10.61%
## 6                            11.25%
##   X..of.Background.Sequences.with.Motif.of.42500.
## 1                                           195.5
## 2                                           288.6
## 3                                           217.2
## 4                                          3309.5
## 5                                          2313.8
## 6                                          2547.7
##   X..of.Background.Sequences.with.Motif
## 1                                 0.46%
## 2                                 0.68%
## 3                                 0.51%
## 4                                 7.84%
## 5                                 5.48%
## 6                                 6.04%
##                                          Motif.Name      Consensus P.value
## 1        p53(p53)/mES-cMyc-ChIP-Seq(GSE11431)/Homer ACATGCCCGGGCAT   0e+00
## 2     Gata2(Zf)/K562-GATA2-ChIP-Seq(GSE18829)/Homer     BBCTTATCTS   0e+00
## 3    Gata4(Zf)/Heart-Gata4-ChIP-Seq(GSE35151)/Homer     NBWGATAAGR   0e+00
## 4 FOXP1(Forkhead)/H9-FOXP1-ChIP-Seq(GSE31006)/Homer   NYYTGTTTACHN  1e-302
## 5    Gata6(Zf)/HUG1N-GATA6-ChIP-Seq(GSE51936)/Homer     YCTTATCTBN  1e-286
## 6     Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer     SAGATAAGRV  1e-270
##   Log.P.value q.value..Benjamini. X..of.Target.Sequences.with.Motif.of.30028.
## 1      -884.3                   0                                        1063
## 2      -774.2                   0                                        5936
## 3      -754.8                   0                                        8176
## 4      -697.7                   0                                        4284
## 5      -659.2                   0                                        7279
## 6      -622.7                   0                                        5194
##   X..of.Target.Sequences.with.Motif
## 1                             3.54%
## 2                            19.75%
## 3                            27.20%
## 4                            14.25%
## 5                            24.22%
## 6                            17.28%
##   X..of.Background.Sequences.with.Motif.of.29743.
## 1                                           208.2
## 2                                          3512.9
## 3                                          5373.7
## 4                                          2335.7
## 5                                          4774.7
## 6                                          3129.1
##   X..of.Background.Sequences.with.Motif
## 1                                 0.70%
## 2                                11.84%
## 3                                18.12%
## 4                                 7.87%
## 5                                16.10%
## 6                                10.55%